# Read in data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
setwd("C:/Users/Mihai/Desktop/R Notebooks/notebooks/UG-met")
<- "C:/Users/Mihai/Desktop/R Notebooks/notebooks/UG-met"
folder <- readr::read_csv("PRE_answers_2022.csv") pre
<- readr::read_csv("POST_answers_2022.csv") post
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## ID df
<- rio::import(file.path(folder, "Scale complete triate Sofi pa4.xlsx"),
id_df skip = 0, colNames = FALSE, which = "incadrari")
<- id_df[, 1:4]
id_df colnames(id_df) <- c("Grup", "Cond", "id", "email")
id_df %>%
id_df ::remove_empty("rows") %>%
janitor::mutate(id = stringr::str_remove(id, "^0+"), # remove leading zeros
dplyrid = stringr::str_remove_all(id, "[[:blank:]]"), # remove any white space
id = toupper(id)) %>%
::mutate(Cond = stringr::str_replace(Cond, "12CONTROL", "CONTROL"), # fix typo
dplyrGrup = stringr::str_replace(Grup, "burnout", "Burnout"),
Grup = stringr::str_replace(Grup, "pop generala", "pop gen"),
Grup = stringr::str_replace(Grup, "old", "pop gen")) %>%
::mutate(Grup = dplyr::if_else(is.na(Grup), "pop gen", Grup))
id_df %>%
id_df ::separate(id,
tidyrinto = c("id_num", "Exp_type"),
sep = "(?<=[0-9])(?=[A-Za-z])", # ?<= is "look behind"
remove = FALSE
) ::select(-id_num) %>%
dplyr::mutate(Exp_type = dplyr::if_else(Exp_type %in% c("A", "B", "C", "D", "E", "R", "X"), "online", Exp_type)) %>%
dplyr::mutate(email = tolower(email),
dplyremail = stringr::str_remove_all(email, "[[:blank:]]"))
# Read in data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
setwd("C:/Users/Mihai/Desktop/R Notebooks/notebooks/UG-met")
<- rio::import("mail-grup.xlsx", skip = 3)
<- email_df[1:42, 1:4]
gsr_df names(gsr_df)[1:4] <- c("Group", "Cond", "ID", "email")
<- email_df[181:197, 2:4]
rmn_df names(rmn_df)[1:3] <- c("Nr_Crt", "ID", "email")
$Cond <- rep("EXPERIMENTAL", nrow(rmn_df))
rmn_df# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Exclude known test-IDs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
<- c("bica.andreea21@gmail.com", "ioana.r.podina@gmail.com", "test@ro", "rozetadraghici@gmail.com", "ioana.podina@fpse.unibuc.ro",
excluded_id "cociaioana@gmail.com")
<- "PA1"
pre %>%
pre ::filter(!Username %in% excluded_id) %>%
dplyr::filter(!str_detect(Username, varstnici_pattern))
post %>%
post ::filter(!Username %in% excluded_id) %>%
dplyr::filter(!str_detect(Username, varstnici_pattern))
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Check & Exclude IDs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
pre count(Username) %>%
print(n = Inf) # "luciana.haloiu@invingemautismul.ro" has 24 trials instead of 12
post count(Username) %>%
print(n = Inf)
pre %>%
pre ::group_by(Username) %>% # exclude last 12 trials of "luciana.haloiu@invingemautismul.ro"
dplyr::filter(!(Username == "luciana.haloiu@invingemautismul.ro" & dplyr::row_number() %in% 13:24))
dplyr$Timestamp[pre$Username == "luciana.haloiu@invingemautismul.ro"] # check: 12th trial is at "2020-11-23 16:40:59 UTC" pre
[1] "2020-11-23 16:38:20 UTC" "2020-11-23 16:38:37 UTC" "2020-11-23 16:38:52 UTC" "2020-11-23 16:39:05 UTC" "2020-11-23 16:39:16 UTC"
[6] "2020-11-23 16:39:30 UTC" "2020-11-23 16:39:40 UTC" "2020-11-23 16:39:58 UTC" "2020-11-23 16:40:13 UTC" "2020-11-23 16:40:26 UTC"
[11] "2020-11-23 16:40:39 UTC" "2020-11-23 16:40:59 UTC"
<- data.frame(pre = unique(pre$Username))
pre_ids <- data.frame(post = unique(post$Username))
post_ids <- dplyr::full_join(pre_ids, post_ids, by = c("pre" = "post"), keep = TRUE)
list_ids list_ids
complete_ids %>%
list_ids ::drop_na() %>%
tidyr::mutate(pre = as.character(pre)) %>%
# Keep only IDs that have both PRE and POST
pre %>%
pre ::filter(Username %in% complete_ids)
post %>%
post ::filter(Username %in% complete_ids)
### Radical exclusions (keep only the first 12 observations)
pre %>%
pre group_by(Username) %>%
::filter(row_number() <= 12)
post %>%
post group_by(Username) %>%
::filter(row_number() <= 12) dplyr
# some Usernames are emails, some are IDs in form of emails
pre %>%
pre ::mutate(id_user = Username) %>%
dplyr::select(Username, id_user, everything()) %>%
dplyr::mutate(id_user = stringr::str_remove(id_user, "@.*")) %>%
dplyr::mutate(id_user = if_else(stringr::str_detect(id_user, "A10|GSR"), id_user, NA_character_)) %>%
dplyr::mutate(id_user = stringr::str_remove(id_user, "^0+"), # remove leading zeros
dplyrid_user = stringr::str_remove_all(id_user, "[[:blank:]]"), # remove any white space
id_user = toupper(id_user)) %>%
::mutate(Username = tolower(Username),
dplyrUsername = stringr::str_remove_all(Username, "[[:blank:]]"))
post %>%
post ::mutate(id_user = Username) %>%
dplyr::select(Username, id_user, everything()) %>%
dplyr::mutate(id_user = stringr::str_remove(id_user, "@.*")) %>%
dplyr::mutate(id_user = if_else(stringr::str_detect(id_user, "A10|GSR"), id_user, NA_character_)) %>%
dplyr::mutate(id_user = stringr::str_remove(id_user, "^0+"), # remove leading zeros
dplyrid_user = stringr::str_remove_all(id_user, "[[:blank:]]"), # remove any white space
id_user = toupper(id_user)) %>%
::mutate(Username = tolower(Username),
dplyrUsername = stringr::str_remove_all(Username, "[[:blank:]]"))
# Exclude subjects from A10 (RMN sample)
pre %>%
pre ::filter(!str_detect(Username, "a10"))
post %>%
post ::filter(!str_detect(Username, "a10"))
# Merge
<- dplyr::left_join(pre, id_df, by = c("Username" = "email")) %>%
pre_united ::left_join(., id_df, by = c("id_user" = "id"), suffix = c("", ".x")) %>%
dplyr::mutate(Grup = dplyr::coalesce(Grup, Grup.x),
dplyrCond = dplyr::coalesce(Cond, Cond.x),
Exp_type = dplyr::coalesce(Exp_type, Exp_type.x),
email = dplyr::coalesce(email, email)) %>%
::select(!contains(".x")) %>%
dplyr::filter(!is.na(Grup), !is.na(Cond))
<- dplyr::left_join(post, id_df, by = c("Username" = "email")) %>%
post_united ::left_join(., id_df, by = c("id_user" = "id"), suffix = c("", ".x")) %>%
dplyr::mutate(Grup = dplyr::coalesce(Grup, Grup.x),
dplyrCond = dplyr::coalesce(Cond, Cond.x),
Exp_type = dplyr::coalesce(Exp_type, Exp_type.x),
email = dplyr::coalesce(email, email)) %>%
::select(!contains(".x")) %>%
dplyr::filter(!is.na(Grup), !is.na(Cond))
# To keep rest of code working
<- pre_united
pre <- post_united
<- rbind(pre_united, post_united) %>%
merged_united ::select(Username, Grup, Cond) dplyr
# Compute scores ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# utilizam doar Decident: unfair = 17-24 (6 oferte diferite); fair = 49-54 (6 oferte diferite)
# Define Fair/Unfair
pre %>%
pre ::mutate(Type = cut(Decident, breaks = c(-Inf, 30, Inf), labels = c("Unfair", "Fair")))
post %>%
post ::mutate(Type = cut(Decident, breaks = c(-Inf, 30, Inf), labels = c("Unfair", "Fair")))
# Compute Percent scores
pre_scores %>%
pre ::add_count(Username, Type, name = "n_Type") %>%
dplyr::count(Username, Type, Accepted, n_Type, name = "n_Accepted_Type", .drop = FALSE) %>% # need .drop = FALSE for missing factor levels leading to 0% Percentages
dplyr::mutate(Percent = 100 * n_Accepted_Type / n_Type) %>%
dplyr::ungroup() %>%
dplyr::complete(Username, Type, Accepted, fill = list(n_Type = 0, n_Accepted_Type = 0, Percent = 0)) %>% # for missing factor levels leading to 0% Percentages
tidyr::filter(Accepted == "Y") %>%
dplyr::mutate(PrePost = rep("Pre", n())) %>%
dplyr::mutate(PrePost= factor(PrePost, levels = c("Pre", "Post")))
post_scores %>%
post ::add_count(Username, Type, name = "n_Type") %>%
dplyr::count(Username, Type, Accepted, n_Type, name = "n_Accepted_Type", .drop = FALSE) %>% # need .drop = FALSE for missing factor levels leading to 0% Percentages
dplyr::mutate(Percent = 100 * n_Accepted_Type / n_Type) %>%
dplyr::ungroup() %>%
dplyr::complete(Username, Type, Accepted, fill = list(n_Type = 0, n_Accepted_Type = 0, Percent = 0)) %>% # for missing factor levels leading to 0% Percentages
tidyr::filter(Accepted == "Y") %>%
dplyr::mutate(PrePost = rep("Post", n())) %>%
dplyr::mutate(PrePost = factor(PrePost, levels = c("Pre", "Post")))
<- rbind(pre_scores, post_scores) %>%
merged_scores ::left_join(., merged_united, by = "Username")
merged_scores ::filter(Grup == "pop gen", Cond == "EXPERIMENTAL") %>%
ggstatsplotdata = .,
x = PrePost,
y = Percent,
grouping.var = Type,
type = "parametric",
pairwise.comparisons = TRUE,
pairwise.display = "all",
annotation.args = list(title = "Populatie Genearala - TR"))
merged_scores ::filter(Grup == "pop gen", Cond == "CONTROL") %>%
ggstatsplotdata = .,
x = PrePost,
y = Percent,
grouping.var = Type,
type = "parametric",
pairwise.comparisons = TRUE,
pairwise.display = "all",
annotation.args = list(title = "Populatie Genearala - CTRL"))
merged_scores ::filter(Grup == "PTSD", Cond == "EXPERIMENTAL") %>%
ggstatsplotdata = .,
x = PrePost,
y = Percent,
grouping.var = Type,
type = "np",
pairwise.comparisons = TRUE,
pairwise.display = "all",
annotation.args = list(title = "PTSD - TR"))
merged_scores ::filter(Grup == "PTSD", Cond == "CONTROL") %>%
ggstatsplotdata = .,
x = PrePost,
y = Percent,
grouping.var = Type,
type = "np",
pairwise.comparisons = TRUE,
pairwise.display = "all",
annotation.args = list(title = "PTSD - CTRL"))
merged_scores ::filter(Grup == "Burnout", Cond == "EXPERIMENTAL") %>%
ggstatsplotdata = .,
x = PrePost,
y = Percent,
grouping.var = Type,
type = "np",
pairwise.comparisons = TRUE,
pairwise.display = "all",
annotation.args = list(title = "Burnout - TR"))
merged_scores ::filter(Grup == "Burnout", Cond == "CONTROL") %>%
ggstatsplotdata = .,
x = PrePost,
y = Percent,
grouping.var = Type,
type = "np",
pairwise.comparisons = TRUE,
pairwise.display = "all",
annotation.args = list(title = "Burnout - CTRL"))
